4 Genomic data

4.1 Read duplicates

all_data %>%
    select(dataset,Extraction,duplicates,Taxon) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(duplicates), sd(duplicates))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of fraction of duplicated reads")
tinytable_5mvon2r04tnisfgjj3n9
Mean and standard deviation of fraction of duplicated reads
Taxon ZYMO DREX EHEX
Amphibian 0.3±0.2 0.2±0.2 0.2±0.2
Reptile 0.5±0.4 0.3±0.3 0.4±0.4
Mammal 0.4±0.4 0.2±0.1 0.2±0.2
Bird 0.8±0.3 0.9±0.1 0.6±0.4
Control 1.0±0.0 0.9±0.0 1.0±0.0
all_data %>%
    select(dataset,Extraction,duplicates,Taxon, Species) %>%
    mutate(duplicates=duplicates*100) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=duplicates, color=Species, group=Extraction)) + 
        scale_y_reverse() +
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Duplication rate (%)",x="Extraction method")

all_data %>%
    select(dataset,Sample,Species,Extraction,duplicates,Taxon) %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(duplicates ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_b64lszpfb3xe8cj6wfa5
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 0.48640693 0.07257915 6.701744 14.33359 8.937456e-06
fixed NA ExtractionDREX -0.09279626 0.03900562 -2.379048 160.72377 1.853073e-02
fixed NA ExtractionEHEX -0.14618630 0.03918499 -3.730671 160.73104 2.644423e-04
ran_pars Sample sd__(Intercept) 0.00000000 NA NA NA NA
ran_pars Species sd__(Intercept) 0.23148121 NA NA NA NA
ran_pars Residual sd__Observation 0.21005171 NA NA NA NA

4.2 Depth of coverage

all_data %>%
    select(dataset,Extraction,coverage_depth,Taxon) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(coverage_depth), sd(coverage_depth))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of fraction of duplicated reads")
tinytable_uu6t9qkg0vg7eahm5rup
Mean and standard deviation of fraction of duplicated reads
Taxon ZYMO DREX EHEX
Amphibian 0.0±0.0 0.0±0.0 0.0±0.0
Reptile 0.2±0.3 0.1±0.1 0.1±0.1
Mammal 0.7±1.2 0.3±0.4 0.5±1.0
Bird 0.4±0.8 0.6±0.5 0.8±0.8
Control 0.0±0.0 0.0±0.0 0.0±0.0
all_data %>%
    select(dataset,Extraction,coverage_depth,Taxon, Species) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=coverage_depth, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Depth of coverage",x="Extraction method")

all_data %>%
    select(dataset,Sample,Species,Extraction,coverage_depth,Taxon) %>%
    unique() %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(coverage_depth ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_pv8megf2wmkcswl4bmm2
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 0.329125000 0.1331063 2.47264850 20.58494 0.02222872
fixed NA ExtractionDREX -0.089791667 0.1144640 -0.78445305 48.00000 0.43662873
fixed NA ExtractionEHEX -0.002791667 0.1144640 -0.02438903 48.00000 0.98064341
ran_pars Sample sd__(Intercept) 0.408634577 NA NA NA NA
ran_pars Species sd__(Intercept) 0.224731208 NA NA NA NA
ran_pars Residual sd__Observation 0.396515070 NA NA NA NA

4.3 Breadth of coverage

all_data %>%
    select(dataset,Extraction,coverage_breadth,Taxon) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(coverage_breadth), sd(coverage_breadth))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of depth of host genome coverage")
tinytable_a5sije3mccub4l4hgex0
Mean and standard deviation of depth of host genome coverage
Taxon ZYMO DREX EHEX
Amphibian 0.0±0.0 0.0±0.0 0.0±0.0
Reptile 4.9±7.5 3.0±5.4 2.9±3.7
Mammal 5.7±5.9 10.2±16.4 15.1±26.4
Bird 0.6±0.5 3.2±4.4 8.9±13.9
Control 0.0±0.0 0.0±0.0 0.0±0.0
all_data %>%
    select(dataset,Extraction,coverage_breadth,Taxon,Species) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=coverage_breadth, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Breadth of coverage (%)",x="Extraction method")

all_data %>%
    select(dataset,Extraction,Sample,Species,coverage_breadth,Taxon) %>%
    unique() %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(coverage_breadth ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_g1wp43v3o9bqvg6g0jfa
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 2.799167 2.252826 1.2425133 20.82558 0.22785670
fixed NA ExtractionDREX 1.301250 1.956426 0.6651158 48.00000 0.50915975
fixed NA ExtractionEHEX 3.918750 1.956426 2.0030144 48.00000 0.05084021
ran_pars Sample sd__(Intercept) 7.118462 NA NA NA NA
ran_pars Species sd__(Intercept) 3.549767 NA NA NA NA
ran_pars Residual sd__Observation 6.777259 NA NA NA NA